# ax_cuda.m4: An m4 macro to detect and configure Cuda
#
# Copyright © 2008 Frederic Chateau <frederic.chateau@cea.fr>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
#


#
# SYNOPSIS
#	AX_CUDA()
#
# DESCRIPTION
#	Checks the existence of Cuda binaries and libraries.
#	Options:
#	--with-cuda=(path|yes|no)
#		Indicates whether to use Cuda or not, and the path of a non-standard
#		installation location of Cuda if necessary.
#
#	This macro calls:
#		AC_SUBST(CUDA_CFLAGS)
#		AC_SUBST(CUDA_LIBS)
#		AC_SUBST(NVCC)
#		AC_SUBST(NFLAGS)
#
AC_DEFUN([AX_CUDA],
[
AC_ARG_WITH([cuda],
    AS_HELP_STRING([--with-cuda@<:@=yes|no|DIR@:>@], [prefix where cuda is installed (default=yes)]),
[
	with_cuda=$withval
	if test "$withval" = "no"
	then
		want_cuda="no"
	elif test "$withval" = "yes"
	then
		want_cuda="yes"
	else
		want_cuda="yes"
		cuda_home_path=$withval
	fi
],
[
	want_cuda="yes"
])

AC_ARG_ENABLE([cuda-fast-math],
    AC_HELP_STRING([--enable-cuda-fast-math], [Turn on fast, less precise math functions in CUDA]),
    [case "${enableval}" in
        yes) CUDA_FAST_MATH=true ;;
        no)  CUDA_FAST_MATH=false ;;
        *)   AC_MSG_ERROR([bad value ${enableval} for --enable-cuda-fast-math]) ;;
    esac],
    [CUDA_FAST_MATH=false]
)


AC_ARG_ENABLE([emu],
    AS_HELP_STRING([--enable-emu], [Turn on device emulation for CUDA]),
    [case "${enableval}" in
        yes) EMULATION=true ;;
        no)  EMULATION=false ;;
        *)   AC_MSG_ERROR([bad value ${enableval} for --enable-emu]) ;;
    esac],
    [EMULATION=false]
)

#AM_CONDITIONAL(USE_CUDA, test "x${want_cuda}" = xyes)

if test "$want_cuda" = "yes"
then
	# check that nvcc compiler is in the path
	if test -n "$cuda_home_path"
	then
	    nvcc_search_dirs="$PATH$PATH_SEPARATOR$cuda_home_path/bin"
	else
	    nvcc_search_dirs=$PATH
	fi

	AC_PATH_PROG([NVCC], [nvcc], [], [$nvcc_search_dirs])
	if test -n "$NVCC"
	then
		have_nvcc="yes"
	else
		have_nvcc="no"
	fi

	# test if nvcc version is >= 2.3
	NVCC_VERSION=`$NVCC --version | grep release | awk 'gsub(/,/, "") {print [$]5}'`
	AC_MSG_RESULT([nvcc version : $NVCC_VERSION])
	
        #libdir=lib #NOTE: was lib, but changed to lib64 for CUDA 8.0
        libdir=lib64

	# set CUDA flags
	if test -n "$cuda_home_path"
	then
	    CUDA_CFLAGS="-I$cuda_home_path/include"
	    CUDA_LIBS="-L$cuda_home_path/$libdir -lcudart"
	else
	    CUDA_CFLAGS="-I/usr/local/cuda/include"
	    CUDA_LIBS="-L/usr/local/cuda/$libdir -lcudart"
	fi

	# Env var CUDA_DRIVER_LIB_PATH can be used to set an alternate driver library path
	# this is usefull when building on a host where only toolkit (nvcc) is installed
	# and not driver. Driver libs must be placed in some location specified by this var.
	if test -n "$CUDA_DRIVER_LIB_PATH"
	then
	    CUDA_LIBS+=" -L$CUDA_DRIVER_LIB_PATH -lcuda"
	else
	    CUDA_LIBS+=" -lcuda"
	fi

	saved_CPPFLAGS=$CPPFLAGS
	saved_LIBS=$LIBS

	CPPFLAGS="$CPPFLAGS $CUDA_CFLAGS"
	LIBS="$LIBS $CUDA_LIBS"

	AC_LANG_PUSH(C)
	AC_MSG_CHECKING([for Cuda headers])
	AC_COMPILE_IFELSE(
	[
		AC_LANG_PROGRAM([@%:@include <cuda.h>], [])
	],
	[
		have_cuda_headers="yes"
		AC_MSG_RESULT([yes])
	],
	[
		have_cuda_headers="no"
		AC_MSG_RESULT([not found])
	])

	AC_MSG_CHECKING([for Cuda libraries])
	AC_LINK_IFELSE(
	[
		AC_LANG_PROGRAM([@%:@include <cuda.h>],
		[
			CUmodule cuModule;
			CUdeviceptr devPtr;
			CUfunction cuFunction;
			size_t pitch, width = 250, height = 500;

			int main()
			{
				cuModuleLoad(&cuModule, "myModule.cubin");
				cuMemAllocPitch(&devPtr, &pitch,width * sizeof(float), height, 4);
				cuModuleGetFunction(&cuFunction, cuModule, "myKernel");
				cuFuncSetBlockShape(cuFunction, 512, 1, 1);
				cuParamSeti(cuFunction, 0, devPtr);
				cuParamSetSize(cuFunction, sizeof(devPtr));
				cuLaunchGrid(cuFunction, 100, 1);
				return 0;
			}
		])
	],
	[
		have_cuda_libs="yes"
		AC_MSG_RESULT([yes])
	],
	[
		have_cuda_libs="no"
		AC_MSG_RESULT([not found])
	])
	AC_LANG_POP(C)

	CPPFLAGS=$saved_CPPFLAGS
	LIBS=$saved_LIBS
	
	if test "$have_cuda_headers" = "yes" -a "$have_cuda_libs" = "yes" -a "$have_nvcc" = "yes"
	then
		have_cuda="yes"
	else
		have_cuda="no"
		AC_MSG_ERROR([Cuda is requested but not available])
	fi
fi

if test x$EMULATION = xtrue
then
    NFLAGS+=" -deviceemu"
fi
 
if test x$CUDA_FAST_MATH = xtrue
then
	NFLAGS+=" -use_fast_math"
fi
AC_MSG_NOTICE([Using NFLAGS=$NFLAGS])

AC_SUBST(CUDA_CFLAGS)
AC_SUBST(CUDA_LIBS)
AC_SUBST(NVCC)
AC_SUBST(NFLAGS)
])
